clear all

*Define global path for replication package
global path "~/Dropbox/IT_Revolution/Replication_package/JPE submission"

global path_rawdata "$path/Raw_data"
global path_cleandata "$path/Clean_data"
global path_output "$path/Output"

cap mkdir "$path_output"

*** Estimate regressions for manufacturing exposure, 1900-1940
********************************************************************************
use "$path_cleandata/data_occ_1900_1940_s1_a29distance.dta", clear

*Step 1. Create variables for estimation
********************************************************************************

*create variables for regressions
gen change_emp1 = log(emp1910/emp1900)
gen change_emp2 = log(emp1920/emp1900)
gen change_emp3 = log(emp1930/emp1900)
gen change_emp4 = log(emp1940/emp1900)

*select occupations with positive employment in 1900 and 1940
gen aind = change_emp4 != .
egen ind = sum(aind), by(occ)
keep if ind == 3

*weights
gen aemp1900 = emp1900 if group == 0
egen emp_all0 = mean(aemp1900), by(occ)

*standardized regressors
foreach var of varlist manuf1900 elect_share_noutil {
	sum `var' if group == 0 [aw=emp_all0]
	gen exposure_`var' = (`var' - `r(mean)')/`r(sd)' 
}
rename (exposure_manuf1900 exposure_elect_share_noutil) (exposure_manuf exp_elec)
egen aad_ent = std(ad_ent)

*Step 2. Regressions for all workers - Figure 1.A 
********************************************************************************
local exposure exposure_manuf
local controls
	
foreach p of numlist 1/4 {
	reg change_emp`p' `exposure' `controls' if group == 0 [aw = emp_all0]  , cluster(occ)
	est store per`p'_emp
}
 
*Step 3. Regressions for worker generations - Figure 1.C and 1.D 
********************************************************************************
local exposure exposure_manuf
local controls

foreach p of numlist 1/4 {
	reg change_emp`p' c.`exposure'#i.group i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
	est sto per`p'_group_emp
}

* Step 4: Baseline specification for Table A2
********************************************************************************
local exposure exposure_manuf
local controls 

gen variable = `exposure'
gen aux_variable = .

foreach p of numlist 1 2 3 4 {
	
	replace variable = `exposure'
	replace aux_variable = .
	reg change_emp`p' variable `controls' if group == 0 [aw = emp_all0]  , cluster(occ)
	est store per`p'_emp_base
	
	replace variable = c.`exposure'#1.group
	replace aux_variable = c.`exposure'#2.group
	reg change_emp`p' variable aux_variable i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
	est sto per`p'_group1_emp_base
	
	replace variable = c.`exposure'#2.group
	replace aux_variable = c.`exposure'#1.group
	reg change_emp`p' variable aux_variable i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
	est sto per`p'_group2_emp_base
	
	replace variable = c.`exposure'#1.group
	replace aux_variable = c.`exposure'#2.group
	reg change_emp`p' `exposure' variable aux_variable i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
	est sto per`p'_dgroup_emp_base
	
}

* Step 5: Robustness wrt exposure measure for Table A2
********************************************************************************
local controls 

foreach exposure of varlist exp_elec {
	foreach p of numlist 2 4 {
		replace variable = `exposure'
		replace aux_variable = .
		reg change_emp`p' variable `controls' if group == 0 [aw = emp_all0]  , cluster(occ)
		est sto per`p'_emp_`exposure'
		
		replace variable = c.`exposure'#1.group
		replace aux_variable = c.`exposure'#2.group
		reg change_emp`p' variable aux_variable i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
		est sto per`p'_group1_emp_`exposure'
		
		replace variable = c.`exposure'#2.group
		replace aux_variable = c.`exposure'#1.group
		reg change_emp`p' variable aux_variable i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
		est sto per`p'_group2_emp_`exposure'
		
		replace variable = c.`exposure'#1.group
		replace aux_variable = c.`exposure'#2.group
		reg change_emp`p' `exposure' variable aux_variable i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
		est sto per`p'_dgroup_emp_`exposure'
	}
}

* Step 7: Employment by exposure and distance groups for Figure 2
********************************************************************************
use "$path_cleandata/data_occ_1900_1940_s1_a29distance.dta", clear

*create variables for regressions
gen change_emp4 = log(emp1940/emp1900)

*Select occupations with positive employment in 1900 and 1940
gen aind = change_emp4 != .
egen ind = sum(aind), by(occ)
keep if ind == 3

*Weights
gen aemp1900 = emp1900 if group == 0
egen emp_all0 = mean(aemp1900), by(occ)

*Standardized exposure measures
foreach var of varlist manuf1900 elect_share_noutil {
	sum `var' if group == 0 [aw=emp_all0]
	gen exposure_`var' = (`var' - `r(mean)')/`r(sd)' 
}
rename (exposure_manuf1900 exposure_elect_share_noutil) (exposure_manuf exp_elec)

*define groups of high/low exposure and distance
local exposure exposure_manuf
local dist ad_ent

sum `exposure' if group == 0 , d
gen high_exp = `exposure' > r(p75)

sum `dist' if group == 0 , d
gen high_dist = `dist' > r(p50)

*compute employment shares
foreach var of varlist emp1900 emp1910 emp1920 emp1930 emp1940 {
	egen tot_emp = sum(`var'), by(group)
	replace `var' = `var'/tot_emp
	drop tot_emp
}

*save temp file with high exp occupation list
preserve
	keep if group == 0
	keep occ high_exp exposure_manuf exp_elec
	rename high_exp high_exp75
	count
	save "$path_cleandata/temp_high_exp_early.dta", replace
restore

*collapse emp by pairs of high/low exposure/distance
collapse (sum) emp1900 emp1910 emp1920 emp1930 emp1940 (count) occ, by(high_exp high_dist group)

sort group high_exp high_dist

foreach p of numlist 1910 1920 1930 1940 {
	gen dlemp`p' = log( emp`p') - log( emp1900 )
}

*create output file for plot
keep if group == 0
keep high_exp high_dist dl*

save "$path_cleandata/exp_dist_plotManuf.dta", replace

* Step 8: Robustness wrt sample for Table A2
********************************************************************************
foreach spec of numlist 2/3 {
	use "$path_cleandata/data_occ_1900_1940_s`spec'_a29.dta", clear

	*create variables for regressions
	gen change_emp1 = log(emp1910/emp1900)
	gen change_emp2 = log(emp1920/emp1900)
	gen change_emp3 = log(emp1930/emp1900)
	gen change_emp4 = log(emp1940/emp1900)

	*select occupations with positive employment in 1900 and 1940
	gen aind = change_emp4 != .
	egen ind = sum(aind), by(occ)
	keep if ind == 3

	*weights
	gen aemp1900 = emp1900 if group == 0
	egen emp_all0 = mean(aemp1900), by(occ)

	*standardized exposure measures
	foreach var of varlist manuf1900  {
		sum `var' if group == 0 [aw=emp_all0]
		gen exposure_`var' = (`var' - `r(mean)')/`r(sd)' 
	}
	rename (exposure_manuf1900) (exposure_manuf)
	
	*estimates
	local exposure exposure_manuf
	local controls 
	
	gen variable = `exposure'
	gen aux_variable = .

	foreach p of numlist 2 4 {
		replace variable = `exposure'
		replace aux_variable = `exposure'
		reg change_emp`p' variable `controls' if group == 0  [aw = emp_all0] , cluster(occ)
		est sto per`p'_emp_s`spec'
		
		replace variable = c.`exposure'#1.group
		replace aux_variable = c.`exposure'#2.group
		reg change_emp`p' variable aux_variable i.group `controls' if group >= 1  [aw = emp_all0]  , cluster(occ)
		est sto per`p'_group1_emp_s`spec'
		
		replace variable = c.`exposure'#2.group
		replace aux_variable = c.`exposure'#1.group
		reg change_emp`p' variable aux_variable i.group `controls' if group >= 1  [aw = emp_all0]  , cluster(occ)
		est sto per`p'_group2_emp_s`spec'
		
		replace variable = c.`exposure'#1.group
		replace aux_variable = c.`exposure'#2.group
		reg change_emp`p' `exposure' variable aux_variable i.group `controls' if group >= 1  [aw = emp_all0]  , cluster(occ)
		est sto per`p'_dgroup_emp_s`spec'
	}
	
}

* Step 9: Robustness wrt age cutoff for Figure A3
********************************************************************************
foreach Y of numlist 25/35 {
	use "$path_cleandata/data_occ_1900_1940_s1_a`Y'.dta", clear

	*create variables for regressions)
	gen change_emp4 = log(emp1940/emp1900)

	*Select occupations with positive employment in 1900 and 1940
	gen aind = change_emp4 != .
	egen ind = sum(aind), by(occ)
	keep if ind == 3

	*Weights
	gen aemp1900 = emp1900 if group == 0
	egen emp_all0 = mean(aemp1900), by(occ)

	*Standardized exposure measures
	foreach var of varlist manuf1900 {
		sum `var' if group == 0 [aw=emp_all0]
		gen exposure_`var' = (`var' - `r(mean)')/`r(sd)' 
	}
	rename (exposure_manuf1900) (exposure_manuf)

	local exposure exposure_manuf

	reg change_emp4 	`exposure' c.`exposure'#1.group i.group `controls' if group >= 1 [aw = emp_all0]   , cluster(occ)
	est sto per4_difgroup_emp_a`Y'
}

*plot estimates: Figure A3 
clear
set obs 11
gen p = _n
gen coef = .
gen SE = .
local exposure exposure
foreach Y of numlist 25/35 {
	est restore per4_difgroup_emp_a`Y'
	capture replace coef = _b[`exposure'#1.group] if p == `Y' - 24
	capture replace SE = _se[`exposure'#1.group] if p == `Y' - 24
}	
*Compute 90CI  
scalar sig90 = 0.1
foreach sig in sig90 {
	capture gen up_`sig' = coef + invnormal(1-`sig'/2)*SE
	capture gen dn_`sig' = coef - invnormal(1-`sig'/2)*SE
}
export delimited using "$path_cleandata/FigA3_multiplecutoff_early.csv", replace



* Step 10: Export the estimates
********************************************************************************

*10.1: Export csv files to create Figure 1 for ICT
local exposure exposure_manuf
foreach spec in emp group_emp dgroup_emp_base {									
	clear
	set obs 4
	gen p = _n
	gen coef = .
	gen SE = .
	foreach i of numlist 1/2 {	
		gen coef`i' = .
		gen SE`i' = .

		*import estimates
		foreach N of numlist 1/4 {
			est restore per`N'_`spec'
			if "`spec'" != "dgroup_emp_base" {
				capture replace coef = _b[`exposure'] if p == `N'
				capture replace SE = _se[`exposure'] if p == `N'
				capture replace coef`i' = _b[`exposure'#`i'.group] if p == `N'
				capture replace SE`i' = _se[`exposure'#`i'.group] if p == `N'
			}
			if "`spec'" == "dgroup_emp_base" {
				replace coef = _b[variable] if p == `N'
				replace SE = _se[variable] if p == `N'
			}
		}

		*compute 90CI  
		scalar sig2 = 0.1
		capture gen up = coef + invnormal(1-sig2/2)*SE
		capture gen dn = coef - invnormal(1-sig2/2)*SE
		capture gen up`i' = coef`i' + invnormal(1-sig2/2)*SE`i'
		capture gen dn`i' = coef`i' - invnormal(1-sig2/2)*SE`i'
	}
	export delimited using "$path_cleandata/early_manuf_`spec'.csv", replace
}

*10.2: Figure A2
import excel "$path_rawdata/wage_early.xlsx", sheet("data") firstrow clear
drop in 1
destring *, replace
drop d781_d779 d781_d780
gen d781_d780 = d781/d780

tsset year, year

gen id = 1
foreach var in d781_d780{
	gen l`var' = log(`var')
}
drop if d781_d780==.
keep year id ld781_d780

reshape wide ld781_d780, i(id) j(year)

forvalues i =1890/1926 {
	foreach var in d781_d780{
		gen change_wage`var'`i' = l`var'`i' - l`var'1900
	}
}
keep id change_waged*
reshape long change_waged781_d780, i(id) j(p)
drop id

keep if p > 1899

twoway (line change_waged781_d780 p, lc(gray) lw(thick)), yline(0, lc(black) lw(thick)) ylabel(-0.08(0.02)0.08) ytitle("") xlabel(1900(5)1930) xtitle("") graphregion(color(white))
graph export "$path_output/FigA2_wage_manuf.png", as(png) replace
graph export "$path_output/FigA2_wage_manuf.eps", as(eps) replace

export delimited using "$path_cleandata/early_wage.csv", replace

*10.3: Table A2 (Panel C): Effect of manufacturing on employment

* Baseline
esttab per2_emp_base per2_group1_emp_base per2_group2_emp_base per2_dgroup_emp_base per4_emp_base per4_group1_emp_base per4_group2_emp_base per4_dgroup_emp_base using "$path_output/TableA2_robust.tex",	///
		keep(variable) cells(b(star fmt(%9.3f)) se(par)) starlevels(* 0.10 ** 0.05 *** 0.01) stats(,) fragment booktabs style(tex)	///
		posthead("\multicolumn{9}{l}{\large{\textit{Panel C}: Effect of Manufacturing exposure on relative employment}} \\[2pt]") ///
		varlabels(variable "Baseline", end("[4pt]"))	///
		collabels(none) mlabels(none) nonumbers nocons nor2 noobs nonotes substitute(\_ \ \midrule "") append

* Alternative exposure measure
esttab per2_emp_exp_elec per2_group1_emp_exp_elec per2_group2_emp_exp_elec per2_dgroup_emp_exp_elec per4_emp_exp_elec per4_group1_emp_exp_elec per4_group2_emp_exp_elec per4_dgroup_emp_exp_elec using "$path_output/TableA2_robust.tex",	///
		keep(variable) cells(b(star fmt(%9.3f)) se(par)) starlevels(* 0.10 ** 0.05 *** 0.01) stats(,) fragment booktabs style(tex) ///
		posthead("\multicolumn{9}{l}{Alternative exposure measure} \\")	///
		varlabels(variable "\hspace{3mm}Electricity exposure", end("[4pt]"))	///
		collabels(none) mlabels(none) nonumbers nocons nor2 noobs nonotes substitute(\_ \ \midrule "") append
		
* Alternative sample
esttab per2_emp_s2 per2_group1_emp_s2 per2_group2_emp_s2 per2_dgroup_emp_s2 per4_emp_s2 per4_group1_emp_s2 per4_group2_emp_s2 per4_dgroup_emp_s2 using "$path_output/TableA2_robust.tex",	///
		keep(variable) cells(b(star fmt(%9.3f)) se(par)) starlevels(* 0.10 ** 0.05 *** 0.01) stats(,) fragment booktabs style(tex) ///
		posthead("\multicolumn{9}{l}{Alternative sample} \\")	///
		varlabels(variable "\hspace{3mm}Only U.S. natives")	///
		collabels(none) mlabels(none) nonumbers nocons nor2 noobs nonotes substitute(\_ \ \midrule "") append
esttab per2_emp_s3 per2_group1_emp_s3 per2_group2_emp_s3 per2_dgroup_emp_s3 per4_emp_s3 per4_group1_emp_s3 per4_group2_emp_s3 per4_dgroup_emp_s3 using "$path_output/TableA2_robust.tex",	///
		keep(variable) cells(b(star fmt(%9.3f)) se(par)) starlevels(* 0.10 ** 0.05 *** 0.01) stats(,) fragment booktabs style(tex) ///
		varlabels(variable "\hspace{3mm}Only white") postfoot("\bottomrule  \end{tabular} \end{adjustbox}")	///
		collabels(none) mlabels(none) nonumbers nocons nor2 noobs nonotes substitute(\_ \ \midrule "") append

		
*10.4: Table A1

use "$path_cleandata/temp_high_exp_early.dta", clear

merge 1:1 occ using "$path_rawdata/occ1950_occ1990dd.dta"
keep if _m == 3

drop occ _merge

preserve
	collapse (mean) exposure_manuf, by(occ1990dd)
	merge 1:1 occ1990dd using "$path_rawdata/occ1990dd_desciption.dta"
	keep if _m == 3
	gsort -exposure_manuf
	gen rank = _n
	keep if rank <= 10
	keep rank occ
	rename occ occ_manuf
	tempfile rank_occ_manuf
	save `rank_occ_manuf'
restore

collapse (mean) exp_elec, by(occ1990dd)
merge 1:1 occ1990dd using "$path_rawdata/occ1990dd_desciption.dta"
keep if _m == 3
gsort -exp_elec
gen rank = _n
keep if rank <= 10
keep rank occ
rename occ occ_elect

merge 1:1 rank using `rank_occ_manuf'
drop _merge
order rank occ_manuf occ_elect
